Libraries Used

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(stringr)


Read In Data

hosp_activity_agesex <- read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv"))
## Rows: 129599 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOf...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_simd <- 
  read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv"))
## Rows: 40894 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl  (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Le...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.


Clean Data

hosp_activity_agesex <- janitor::clean_names(hosp_activity_agesex)
# glimpse(hosp_activity_agesex)

hosp_activity_simd <- janitor::clean_names(hosp_activity_simd)
# glimpse(hosp_acivity_simd)


We only need ACUTE patients, what do we have:

admission_type: * Elective Inpatients (not acute) * Emergency Inpatients
* Transfers (not acute)
* All Day cases (not acute)
* All Inpatients (not necasseraly acute)
* All Inpatients and Day cases (not acute)
* Not Specified (not acute???)

hosp_activity_agesex %>% 
  group_by(admission_type) %>% 
  summarise(total = n())
hosp_activity_simd %>% 
  group_by(admission_type) %>% 
  summarise(total = n())


removing those that aren’t acute (see above)

hosp_acute_activity_agesex <- hosp_activity_agesex %>% 
  filter(admission_type == "Emergency Inpatients")

hosp_acute_activity_simd <- hosp_activity_simd %>% 
  filter(admission_type == "Emergency Inpatients")

Seeing what we’re working with:

Age

hosp_acute_activity_agesex %>% 
  group_by(age) %>% 
  summarise(total_patients_in_age_group = n())
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values

hosp_acute_activity_agesex %>% 
  mutate(age = is.na(age)) %>% 
  filter(age == TRUE)

Sex

hosp_acute_activity_agesex %>% 
  group_by(sex) %>% 
  summarise(total_patients_in_sex_group = n())
# Female: 9,832   Male: 9,799 
# No NA values

hosp_acute_activity_agesex %>% 
  mutate(sex = is.na(age)) %>% 
  filter(sex == TRUE)

SIMD

hosp_acute_activity_simd %>% 
  group_by(simd) %>% 
  summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis


quarter_dates <- c("2016Q3", "2016Q4", "2017Q1", "2017Q2", "2017Q3", "2017Q4", 
                   "2018Q1", "2018Q2", "2018Q3", "2018Q4", "2019Q1", "2019Q2", 
                   "2019Q3", "2019Q4", "2020Q1", "2020Q2", "2020Q3", "2020Q4",
                   "2021Q1", "2021Q2", "2021Q3")


Graphs

Remember there were 7 admission types… I chose to keep one.

Might want to make use of this if we’re not sure about admission types: acute_target <- c("Emergency Inpatients", "Not Specified")

Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All Inpatients and Day cases, Not Specified


Graphs of general admissions and sex

Sex against time and admissions

# do I want to use total length of stay or total average_length_of_stay
# need a total column: 
# sometimes stay = 0, and length of stay = 5

hosp_acute_activity_agesex <- hosp_acute_activity_agesex %>% 
  group_by(sex, quarter) %>% 
  mutate(total_stays_per_quarter_sex = sum(stays))
hosp_acute_activity_agesex

ALSO WANT TOTALS FOR AGE BY QUARTER

hosp_acute_activity_agesex <- hosp_acute_activity_agesex %>% 
  group_by(age, quarter) %>% 
  mutate(total_stays_per_quarter_age = sum(stays))
hosp_acute_activity_agesex

Sex and stays

hosp_acute_activity_agesex %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_sex, 
      group = sex, colour = sex) +
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by Sex",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "Sex") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))


AGE GROUPS AND TOTAL STAYS

hosp_acute_activity_agesex %>% 
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_age, 
      group = age, colour = age) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by Age Group",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "Age") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

SIMD - deprivation level data

SIMD (Scottish Index of Multiple Deprivation)

hosp_acute_activity_simd %>% 
  group_by(simd) %>% 
  summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis


Drop NAs

hosp_acute_activity_simd_no_na <- hosp_acute_activity_simd %>% 
  drop_na(simd)
hosp_acute_activity_simd_no_na
hosp_acute_activity_simd_no_na_simd <- hosp_acute_activity_simd_no_na %>% 
  group_by(simd, quarter) %>% 
  mutate(total_stays_per_quarter_simd = sum(stays))
hosp_acute_activity_simd_no_na_simd
hosp_acute_activity_simd_no_na_simd %>%
     mutate(simd = fct_relevel(as.character(simd, 
                                  "1", "2", "3", "4", "5"))) %>%
  ggplot()+
  aes(x = quarter, 
      y = total_stays_per_quarter_simd, 
      group = simd, colour = simd) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Stays Across Scotland by SIMD Level",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "SIMD Level:
       1 = Most Deprived
       5 = Least Deprived") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))

EPISODES

hosp_acute_activity_simd_no_na_simd <- hosp_acute_activity_simd_no_na %>% 
  group_by(simd, quarter) %>% 
  mutate(total_episodes_per_quarter_simd = sum(episodes))
hosp_acute_activity_simd_no_na_simd
hosp_acute_activity_simd_no_na_simd %>%
     mutate(simd = fct_relevel(as.character(simd, 
                                  "1", "2", "3", "4", "5"))) %>%
  ggplot()+
  aes(x = quarter, 
      y = total_episodes_per_quarter_simd, 
      group = simd, colour = simd) +
  geom_point()+
  geom_line() + 
  labs(x = "Yearly Quarter", 
       y = "Total Stays", 
       title = "Total Emergency Inpatient Episodes Across Scotland by SIMD Level",
       subtitle = "Q3, 2016 - Q3, 2021", 
       colour = "SIMD Level:
       1 = Most Deprived
       5 = Least Deprived") +
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=0.9))